import requests
from bs4 import BeautifulSoup
import pandas as pd

# 设置请求头，模拟浏览器访问
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'}

# 指定关键词
keyword = '证券交易印花税'

# 指定起止页码
page_start = 1
page_end = 3

# 开始多页爬取
news_data = []
for page in range(page_start, page_end + 1):

    # 定义目标URL
    url = f'http://search.zqrb.cn/search.php?src=all&q={keyword}&f=title&s=newsdate_DESC&p={page}'

    # 发起请求并获取网页源代码
    response = requests.get(url=url, headers=headers)
    response.encoding = 'utf-8'
    html = response.text

    # 使用BeautifulSoup解析页面内容
    soup = BeautifulSoup(html, 'lxml')

    # 提取新闻的标题和网址    
    a_elements = soup.select('dl.result-list > dt > a')
    for a in a_elements:
        title = a.get_text().strip().split(' ', 1)[-1]  # 清洗标题文本，删除开头的数字序号和空格
        url = a.get('href')
        news_data.append({'标题': title, '网址': url})

# 将数据导出为CSV文件
df = pd.DataFrame(news_data)
df.to_csv(f'新闻_{keyword}.csv', index=False, encoding='utf-8-sig')
